rm(list =ls())
options(scipen=999)
gc()
packages <-c("tidyverse","estimatr","plm","stargazer",
             "fastDummies","ICCbin","ihs","readstata13","xtable",
             "arm")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
rm(packages, new.packages)

setwd("PUT YOUR DIRECTORY HERE")

## load data
data <- read.csv("./Datasets/panel_bare_bones.csv")

## clean data
data$X <- NULL
data$locality_year <- NULL
data$subbotnik_inkind <- NULL
data$locality <- as.character(tolower(data$locality))

# load covariates
data <- merge(data, read.csv("./Datasets/main_confounders.csv")[,c("locality", "region", "pop", "schools", "shops", "restaurants", "cinemas", "Frac48", "coal_sqm", "minerals", "partition", "priests_continuous")], by = c("locality"), all.x = T)
data$region <- as.numeric(as.factor(data$region))


###################
### First stage ###
###################

# subset to years with observations:
data_t <- data[which(data$year>1974 & data$year<1980),]

## rearrange controls 

# create wealth index
data_t$shops <- scale(data_t$shops)
data_t$restaurants <- scale(data_t$restaurants)
data_t$cinemas <- scale(data_t$cinemas)
data_t$wealth_index <- rowSums(data_t[,c("shops","restaurants","cinemas")], na.rm = T)
head(data_t[,c("shops", "restaurants", "cinemas", "wealth_index")], 50)
data_t$wealth_index[data_t$wealth_index == "NaN"] = NA  
data_t$wealth_index <- scale(data_t$wealth_index)

# create state capacity index
data_t$state_capacity_index <- scale(data_t$schools)

# create ethnic diversity index
data_t$ethnic_diversity_index <- scale(data_t$Frac48)

# create Russian occupation index
data_t$Russian_occupation_index <- ifelse(data_t$partition=="Russian",1,0)
data_t$Russian_occupation_index <- scale(data_t$Russian_occupation_index)

# create industrialization index
data_t$coal_sqm <- scale(data_t$coal_sqm)
data_t$minerals <- scale(data_t$minerals)
data_t$industrialization_index <- rowSums(data_t[,c("coal_sqm","minerals")], na.rm = F)
data_t$industrialization_index[data_t$industrialization_index == "NaN"] = NA  
data_t$industrialization_index <- scale(data_t$industrialization_index)



#################################
#### First stage regressions ####
#################################

#### OLS when averaging over years 1975-79

# aggregate data
data_agg <- aggregate(data_t[,c("locality_numeric", "region", "commanders", "priests_continuous", "wealth_index", "state_capacity_index", "ethnic_diversity_index", "Russian_occupation_index", "industrialization_index")], by=list(Category=data_t$locality_numeric), FUN=mean, na.rm=T)

# regressions
m_first_stage_bare <- lm(commanders ~ priests_continuous, data = data_agg)

m_first_stage_controls <- lm(commanders ~ priests_continuous + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + factor(region), data = data_agg)


#### OLS when averaging over years 1980-86

# subset to years with observations:
data_t_late <- data[which(data$year>1979),]

# average across years
data_agg_late <- aggregate(data_t_late[ ,c("locality_numeric", "region", "commanders", "priests_continuous")], by=list(Category=data_t_late$locality_numeric), FUN=mean, na.rm=T)

# attach covariates from above
data_agg_late <- merge(data_agg_late, data_agg[,c("locality_numeric", "wealth_index", "state_capacity_index", "ethnic_diversity_index", "Russian_occupation_index", "industrialization_index")], by = "locality_numeric", all.x = T)

# regressions
m_first_stage_bare_late <- lm(commanders ~ priests_continuous, data = data_agg_late)

m_first_stage_controls_late <- lm(commanders ~ priests_continuous + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + factor(region), data = data_agg_late)


## save as tex
stargazer(m_first_stage_bare, m_first_stage_controls, m_first_stage_bare_late, m_first_stage_controls_late,
          dep.var.labels = c("Secret police officers"),
          style = "qje",
          covariate.labels = c("Corrupt priests", "Wealth", "State capacity", "Ethnic diversity", 
                               "Russian occupation", "Industrialization"),
          star.char = c("*", "**", "***"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          omit = c("Constant", "factor"),
          omit.stat = c("rsq", "ser"), 
          omit.table.layout = "n",
          add.lines = list(c("Fixed effects", "No", "Yes", "No", "Yes"),
                           c("Controls", "No", "Yes", "No", "Yes")),
          out = "PUT YOUR FILEPATH HERE")




